This document aims to describe a few dataviz that can be applied to a dataset containing an ordered numeric variable, a categoric variable and another numeric variable. An an example we will consider the evolution of first name frequencies in the US between 1880 and 2015. This dataset comes from x and is available through the babynames R library. It looks like that:

# Libraries
library(tidyverse)
library(hrbrthemes)
library(DT)
library(plotly)

# Load dataset from github
data <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/2_TwoNumOrdered.csv", header=T)
data$date <- as.Date(data$date)

# Show long format
data %>%
  head(5) %>%
  datatable(options = list(dom = 't', ordering=F), rownames= FALSE, width = "30px")

1 - Line plot


data %>%
  ggplot( aes(x=date, y=value)) +
    geom_line(color="#69b3a2") +
    ggtitle("Evolution of Bitcoin price") +
    ylab("bitcoin price ($)") +
    theme_ipsum()

2 - Area chart


data %>%
  ggplot( aes(x=date, y=value)) +
    geom_area(fill="#69b3a2", alpha=0.5) +
    geom_line(color="#69b3a2") +
    ggtitle("Evolution of Bitcoin price") +
    ylab("bitcoin price ($)") +
    theme_ipsum()

3 - Area chart


p <- data %>%
  ggplot( aes(x=date, y=value)) +
    geom_area(fill="#69b3a2", alpha=0.5) +
    geom_line(color="#69b3a2") +
    ggtitle("Evolution of Bitcoin price") +
    ylab("bitcoin price ($)") +
    theme_ipsum()
ggplotly(p)

4 - Few points? Use connected scatter


data %>%
  tail(10) %>%
  ggplot( aes(x=date, y=value)) +
    geom_area(fill="#69b3a2", alpha=0.5) +
    geom_line(color="#69b3a2") +
    geom_point() +
    ggtitle("Evolution of Bitcoin price") +
    ylab("bitcoin price ($)") +
    theme_ipsum()

5 - To cut or not to cut?


data %>%
  tail(10) %>%
  ggplot( aes(x=date, y=value)) +
    geom_ribbon(aes(ymin=8000, ymax=value),  fill="#69b3a2", color="transparent", alpha=0.5) +
    geom_line(color="#69b3a2") +
    geom_point() +
    ggtitle("Evolution of Bitcoin price") +
    ylab("bitcoin price ($)") +
    theme_ipsum()

6 - Comparing to a limit


data %>%
  tail(300) %>%
  mutate( mycolor=ifelse(value>7500, "yes", "no")) %>%
  ggplot( aes(x=date, y=value)) +
    geom_ribbon(aes(ymin=7500, ymax=value, fill=mycolor), color="transparent", alpha=0.5) +
    geom_line(color="#69b3a2") +
    ggtitle("Evolution of Bitcoin price") +
    ylab("bitcoin price ($)") +
    theme_ipsum()

 

A work by Yan Holtz for data-to-viz.com